This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
install.packages("plotly")
## Installing package into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
video_stats <- read.delim("videos-stats.csv", sep=",", strip.white=T)
dim(video_stats)
## [1] 1881 8
str(video_stats)
## 'data.frame': 1881 obs. of 8 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Title : chr "Apple Pay Is Killing the Physical Wallet After Only Eight Years | Tech News Briefing Podcast | WSJ" "The most EXPENSIVE thing I own." "My New House Gaming Setup is SICK!" "Petrol Vs Liquid Nitrogen | Freezing Experiment | \340\264\252\340\265\206\340\264\237\340\265\215\340\264\260\340\265\213\340\ ...
## $ Video.ID : chr "wAZZ-UWGVHI" "b3x28s61q3c" "4mgePWWCAmA" "kXiYSI7H2b0" ...
## $ Published.At: chr "2022-08-23" "2022-08-24" "2022-08-23" "2022-08-23" ...
## $ Keyword : chr "tech" "tech" "tech" "tech" ...
## $ Likes : num 3407 76779 63825 71566 96513 ...
## $ Comments : num 672 4306 3338 1426 5155 ...
## $ Views : num 135612 1758063 1564007 922918 1855644 ...
video_stats %>%
summarise_all(~ sum(is.na(.)))
## X Title Video.ID Published.At Keyword Likes Comments Views
## 1 0 0 0 0 0 2 2 2
video_stats <- video_stats %>%
drop_na()
video_stats <- video_stats %>%
mutate(LikesPer1k = round(Likes/(Views/1000), 2),
CommentsPer1k = round(Comments/(Views/1000), 2),
TitleLen = nchar(Title))
video_stats <- video_stats %>%
mutate(PubYear = as.factor(substr(Published.At,1,4)),
Published.At = as.POSIXct(Published.At, format="%Y-%m-%d"))
video_stats %>%
ggplot(aes(x=PubYear)) +
geom_bar(fill="#765add") +
theme_minimal() +
labs(title = "Number of videos by year", x = "Publication Year", y = "Count")
video_stats%>%
ggplot(aes(x=TitleLen)) +
geom_histogram(fill="#765add",bins=30) +
theme_minimal() +
labs(title = "Distribution of title length", x = "Title Length (char)", y = "frequency")
plot1 <- video_stats %>%
# get the total comments per keyword per year; divide by 1000 to change scale
group_by(PubYear, Keyword) %>%
summarize(total_comments = sum(Comments)/1000) %>%
# create a ggplot colored by keywords
ggplot(aes(x=PubYear, y=total_comments, color=Keyword))+
# group = 1 specifies we want 1 line per keyword
geom_line(aes(group=1)) +
geom_point(size=0.5,alpha=0.5) +
ylab("Comment Count") +
xlab("Published Year") +
labs(title="Total Comments by Category Overtime (by 1k)")+
theme_minimal()
## `summarise()` has grouped output by 'PubYear'. You can override using the
## `.groups` argument.
#convert it into a plotly graph
ggplotly(plot1)
video_stats %>%
# Specify variables we want to include
plot_ly(x=~LikesPer1k, y=~CommentsPer1k, color=~Keyword, type="scatter", mode="markers",
size=~Views, sizes=c(5,70),
# Add markers for each point and specify information to display on hover
marker=list(sizemode="diameter", opacity=0.5), hoverinfo="text",
# Customize hover text
text=~paste(
paste0("Likes per 1k views: ", LikesPer1k),
paste0("Comments per 1k views: ", CommentsPer1k),
paste0("Views (100k): ", round(Views/100000, 2)),
paste0("Keyword (Category): ", Keyword),
sep="<br>")) %>%
# Label the axes
layout(title = 'Likes VS Comments per 1k Views',
xaxis = list(title = 'Likes per 1k'),
yaxis = list(title = 'Comments per 1k'),
legend = list(title=list(text='<b> Keyword </b>')))
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
# Install the library
install.packages("flexdashboard")
## Installing package into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)